library(Matrix)
library(foreign)
library(reshape2)
library(doBy)

rm(list=ls(all=TRUE))

for (iteration in 1970:1999) {
  
setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\patent fetch\\output")
set1 <- read.dta("subsidiaries_patents_byyear.dta")
set1 <- set1[which(set1$appyear==iteration), ]
keep1 <- as.vector(set1$cusip)
keep1 <- unique(keep1)

setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\r and d data fetch")
set2 <- read.dta("r and d data_bloom.dta")
keep2 <- as.vector(set2$cusip)
keep2 <- unique(keep2)

setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\ciq fetch\\output")
set3 <- read.dta("subsidiaries_revenue_bloom.dta")
keep3 <- as.vector(set3$cusip)  
keep3 <- unique(keep3)

keep1 <- keep1[keep1 %in% keep2]
keep1 <- keep1[keep1 %in% keep3]
  

########################SPILLTECH
setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\patent fetch\\output")
statadata1 <- read.dta("subsidiaries_patents_byyear.dta")
statadata1 <- statadata1[statadata1$cusip %in% keep1, ]
statadata1 <- statadata1[ which(statadata1$appyear==iteration), ]
statadata1$appyear <- NULL

#format patent data
x1 <- as.matrix(statadata1)
firms <- x1[,1] 
x1 <- x1[,-1]
x1 <- apply(x1, 2, as.numeric) 
rownames(x1) <- firms

x1 <- x1[, colSums(x1) != 0]

#x1 is 4 x 3. it has 4 firms and 3 tech fields
#x1 <- matrix(c(1,2,3,4,5,6,7,8,9,10,11,12),4,3)

#convert counts to proportions
x1 <- x1/rowSums(x1) 

#transpose so that x1 has tech field rows and firm columns
T <- t(x1)

#get denominators
dot <- t(T) %*% T
diag <- diag(dot)
diag <- diag^-1

den <- matrix(0,length(diag),length(diag))
for (i in 1:length(diag)) {
  den[i,i] <- sqrt(diag[i])
}

#normalize T
T_tilda <- T %*% den

#TECH
TECH <- t(T_tilda) %*% T_tilda

#derive X_tilda
X_tilda_num_only <- t(T)

#get denominators
dot <- t(X_tilda_num_only) %*% X_tilda_num_only
diag <- diag(dot)
diag <- diag^-1

den <- matrix(0,length(diag),length(diag))
for (i in 1:length(diag)) {
  den[i,i] <- sqrt(diag[i])
}

#normalize X
X_tilda <- X_tilda_num_only %*% den

#derive omega
omega <- t(X_tilda) %*% X_tilda

#M measure
spilltech_malcorr <- t(T_tilda) %*% omega %*% T_tilda
rownames(spilltech_malcorr) <- rownames(x1)
colnames(spilltech_malcorr) <- rownames(x1)





#########################GET R&D DATA
setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\r and d data fetch")
statadata2 <- read.dta("r and d data_bloom.dta")
statadata2 <- statadata2[statadata2$cusip %in% keep1, ]

#format r and d data
x2 <- as.matrix(statadata2)
firms2 <- x2[,1] 
x2 <- x2[,-1]
x2 <- apply(x2, 2, as.numeric) 
rownames(x2) <- firms2







########################SPILLSIC
setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\1 inputs\\ciq fetch\\output")
statadata3 <- read.dta("subsidiaries_revenue_bloom.dta")
statadata3 <- statadata2[statadata3$cusip %in% keep1, ]

#format revenue data
x3 <- as.matrix(statadata3)
firms3 <- x3[,1] 
x3 <- x3[,-1]
x3 <- apply(x3, 2, as.numeric) 
rownames(x3) <- firms3
x3[x3 < 0] <- 0


x3 <- x3[, colSums(x3) != 0]

#convert counts to proportions
x3 <- x3/rowSums(x3) 

#transpose so that x1 has tech field rows and firm columns
T <- t(x3)

#get denominators
dot <- t(T) %*% T
diag <- diag(dot)
diag <- diag^-1

den <- matrix(0,length(diag),length(diag))
for (i in 1:length(diag)) {
  den[i,i] <- sqrt(diag[i])
}

#normalize T
T_tilda <- T %*% den

#TECH
TECH <- t(T_tilda) %*% T_tilda

#derive X_tilda
X_tilda_num_only <- t(T)

#get denominators
dot <- t(X_tilda_num_only) %*% X_tilda_num_only
diag <- diag(dot)
diag <- diag^-1

den <- matrix(0,length(diag),length(diag))
for (i in 1:length(diag)) {
  den[i,i] <- sqrt(diag[i])
}

#normalize X
X_tilda <- X_tilda_num_only %*% den

#derive omega
omega <- t(X_tilda) %*% X_tilda

#M measure
spillsic_malcorr <- t(T_tilda) %*% omega %*% T_tilda
rownames(spillsic_malcorr) <- rownames(x3)
colnames(spillsic_malcorr) <- rownames(x3)







#compute spillsic
diag(spillsic_malcorr) <- 0
spillsic_mal <- spillsic_malcorr %*% x2

#compute spilltech
diag(spilltech_malcorr) <- 0
spilltec_mal <- spilltech_malcorr %*% x2

################SAVE OUTPUTS
spilltec <- data.frame(spilltec_mal)
spillsic <- data.frame(spillsic_mal)
spilltec$ciq <- rownames(spilltec_mal)
spillsic$ciq <- rownames(spillsic_mal)

setwd("C:\\Users\\rthavarajah\\Desktop\\Paper Stuff\\Bloom Master\\Spillovers Computation\\5 intermediate - R code Mahalanobis\\output")
write.dta(spilltec, file=paste("spilltec_mal", iteration, ".dta", sep=""))
spilltech_malcorr <- data.frame(spilltech_malcorr)
write.dta(spilltech_malcorr, file=paste("spilltec_mal_corr", iteration, ".dta", sep=""))
write.dta(spillsic, "spillsic_mal.dta")



}
